import csv
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import plotly.express as px
import plotly.figure_factory as ff
import plotly.offline as pyo
from IPython.display import Image
import math
from itables import init_notebook_mode
sns.set(rc={'figure.figsize':(11.7,8.27)})
pyo.init_notebook_mode()
init_notebook_mode(all_interactive=True)
df = pd.read_csv('./dinosaur_cleaned.csv')
df
| name | diet | period | lived_in | type | length | taxonomy | named_by | species | link |
|---|---|---|---|---|---|---|---|---|---|
| Loading... (need help?) |
histograms = df.hist()
%%html
<style>
div.input {
display:none;
}
</style>
px.scatter(df, x = 'period', y = 'length', color = 'diet', hover_data = ['name'])
px.scatter(df, x = 'period', y = 'length', color = 'type', hover_data = ['name'])
px.scatter(df, x = 'period', y= 'length', trendline = 'ols', trendline_color_override = 'darkblue')
Image('google.png')
diet_vs_length_scatter = sns.catplot(x = 'diet', y = 'length', data = df, height = 8.27, aspect = 11.7/8.27)
diet_vs_length_box = sns.catplot(x = 'diet', y = 'length', data = df, kind= 'box', height = 8.27, aspect = 11.7/8.27)
type_vs_length_cat = sns.catplot(x = 'type', y = 'length', data = df, height = 8.27, aspect = 11.7/8.27)
type_vs_length_box = sns.catplot(x = 'type', y = 'length', data = df, kind = 'box', height = 8.27, aspect = 11.7/8.27)
Image('formula.png')
df['diet'].value_counts()
df['diet'].value_counts()
| diet | |
|---|---|
| Loading... (need help?) |
herbivore_mean_x1 = df.loc[df['diet'] == 'herbivorous']['length'].mean()
herbivore_std_dev_s1 = df.loc[df['diet'] == 'herbivorous']['length'].std()
herbivore_count_n1 = df.loc[df['diet'] == 'herbivorous']['length'].count()
carnivore_mean_x2 = df.loc[df['diet'] == 'carnivorous']['length'].mean()
carnivore_std_dev_s2 = df.loc[df['diet'] == 'carnivorous']['length'].std()
carnivore_count_n2 = df.loc[df['diet'] == 'carnivorous']['length'].count()
difference_d0 = 0
herbivore_mean_x1 = df.loc[df['diet'] == 'herbivorous']['length'].mean() herbivore_std_dev_s1 = df.loc[df['diet'] == 'herbivorous']['length'].std() herbivore_count_n1 = df.loc[df['diet'] == 'herbivorous']['length'].count()
carnivore_mean_x2 = df.loc[df['diet'] == 'carnivorous']['length'].mean() carnivore_std_dev_s2 = df.loc[df['diet'] == 'carnivorous']['length'].std() carnivore_count_n2 = df.loc[df['diet'] == 'carnivorous']['length'].count()
difference_d0 = 0
print("OUTPUT:")
print("x1 = " + str(herbivore_mean_x1))
print("s1 = " + str(herbivore_std_dev_s1))
print("n1 = " + str(herbivore_count_n1))
print("x2 = " + str(carnivore_mean_x2))
print("s2 = " + str(carnivore_std_dev_s2))
print("n2 = " + str(carnivore_count_n2))
OUTPUT: x1 = 8.899378881987579 s1 = 7.226483384869934 n1 = 161 x2 = 5.220222222222222 s2 = 3.9054982785804686 n2 = 90
numerator = (herbivore_mean_x1 - carnivore_mean_x2) - difference_d0
denominator = math.sqrt( herbivore_std_dev_s1 / herbivore_count_n1 + carnivore_std_dev_s2 / carnivore_count_n2)
z = numerator / denominator
print('z = ' + str(z))
z = 12.38279139344993
Since P(Z > 12.38) < 0.05
This means that herbivores are on average larger than carnivores.
for name in df['named_by']: df['named_by'].loc[df['named_by'] == name] = name[-5:-1]
for name in df['named_by']:
df['named_by'].loc[df['named_by'] == name] = name[-5:-1]
df
| name | diet | period | lived_in | type | length | taxonomy | named_by | species | link |
|---|---|---|---|---|---|---|---|---|---|
| Loading... (need help?) |
px.scatter_3d(df, x = 'period', y = 'length', z = 'named_by', hover_data = ['name'])